# PROJECT DATASETS/INSPIRATION FOUND AT
# https://github.com/KeithGalli/Pandas-Data-Science-Tasks/tree/master/SalesAnalysis/Sales_Data
# IMPORTING LIBRARIES/DATASETS
import os
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None
# ADDING 'pd.options.mode.chained_assignment = None' REMOVES THE 'SettingWithCopyWarning' THAT WILL
# SHOW WHEN RUNNING CODE LATER ON IN THE PROJECT
import plotly.express as px
import plotly.graph_objects as go
jan_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_January_2019.csv')
feb_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_February_2019.csv')
mar_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_March_2019.csv')
apr_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_April_2019.csv')
may_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_May_2019.csv')
june_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_June_2019.csv')
july_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_July_2019.csv')
aug_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_August_2019.csv')
sept_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_September_2019.csv')
oct_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_October_2019.csv')
nov_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_November_2019.csv')
dec_sales_2019 = pd.read_csv(r'C:\Users\jksmi\Downloads\salesproductproject\Sales_December_2019.csv')
# ALL DATASETS WERE CLEANED OF NULL VALUES IN EXCEL PRIOR TO IMPORT
# OBSERVING THE 'jan_sales_2019' DATAFRAME
jan_sales_2019.head()
| Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address | |
|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 |
# CREATING A LIST OF ALL THE MONTHS
#IN ORDER TO CONCAT AND CREATE ONE SALES DATAFRAME FOR 2019
sales_list = [jan_sales_2019, feb_sales_2019,mar_sales_2019,apr_sales_2019,
may_sales_2019,june_sales_2019,july_sales_2019,
aug_sales_2019,sept_sales_2019,oct_sales_2019,
nov_sales_2019,dec_sales_2019]
# CONCATENATING THE 'sales_list' TO CREATE ONE SALES DATAFRAME FOR 2019 NAMED 'sales_2019'
sales_2019 = pd.concat(sales_list)
# CHECKING THE 'sales_2019' DATAFRAME
sales_2019
| Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address | |
|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 |
| ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | Lightning Charging Cable | 1 | 14.95 | 12/11/2019 20:58 | 14 Madison St, San Francisco, CA 94016 |
| 24985 | 319667 | AA Batteries (4-pack) | 2 | 3.84 | 12/1/2019 12:01 | 549 Willow St, Los Angeles, CA 90001 |
| 24986 | 319668 | Vareebadd Phone | 1 | 400.00 | 12/9/2019 6:43 | 273 Wilson St, Seattle, WA 98101 |
| 24987 | 319669 | Wired Headphones | 1 | 11.99 | 12/3/2019 10:39 | 778 River St, Dallas, TX 75001 |
| 24988 | 319670 | Bose SoundSport Headphones | 1 | 99.99 | 12/21/2019 21:45 | 747 Chestnut St, Los Angeles, CA 90001 |
185950 rows × 6 columns
# OBSERVING THE DATA TYPES OF THE COLUMNS IN THE DATAFRAME
sales_2019.dtypes
Order ID int64 Product object Quantity Ordered int64 Price Each float64 Order Date object Purchase Address object dtype: object
# CHECKING FOR NULL VALUES
sales_2019.isnull().sum()
Order ID 0 Product 0 Quantity Ordered 0 Price Each 0 Order Date 0 Purchase Address 0 dtype: int64
# CHECKING FOR THE NUMBER OF DUPLICATE ROWS IN THE 'sales_2019' DATAFRAME
sales_2019.duplicated().sum()
264
# UPDATING THE 'sales_2019' DATAFRAME WITHOUT THE DUPLICATE ROWS
sales_2019 = sales_2019.drop_duplicates()
# CHECKING THE NUMBER OF DUPLICATE ROWS IN THE 'sales_2019' DATAFRAME AGAIN TO SEE IF UPDATE TOOK
sales_2019.duplicated().sum()
0
# CREATING A 'Customer ID' column based on the unique values in the 'Purchase Address' COLUMN
sales_2019['Customer ID'] = sales_2019.groupby('Purchase Address').ngroup()+100000
# CHECKING THE 'Customer ID' column in the 'sales_2019' DATAFRAME
sales_2019
| Order ID | Product | Quantity Ordered | Price Each | Order Date | Purchase Address | Customer ID | |
|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | Lightning Charging Cable | 1 | 14.95 | 12/11/2019 20:58 | 14 Madison St, San Francisco, CA 94016 | 106487 |
| 24985 | 319667 | AA Batteries (4-pack) | 2 | 3.84 | 12/1/2019 12:01 | 549 Willow St, Los Angeles, CA 90001 | 170444 |
| 24986 | 319668 | Vareebadd Phone | 1 | 400.00 | 12/9/2019 6:43 | 273 Wilson St, Seattle, WA 98101 | 127410 |
| 24987 | 319669 | Wired Headphones | 1 | 11.99 | 12/3/2019 10:39 | 778 River St, Dallas, TX 75001 | 206162 |
| 24988 | 319670 | Bose SoundSport Headphones | 1 | 99.99 | 12/21/2019 21:45 | 747 Chestnut St, Los Angeles, CA 90001 | 201282 |
185686 rows × 7 columns
# CHECKING THE NUMBER OF UNIQUE CUSTOMER ID's IN THE 'sales_2019' DATAFRAME
sales_2019['Customer ID'].nunique()
140787
# CHANGING THE COLUMN NAMES IN THE DATAFRAME
new_column_list = ['order_id','product','qty_ordered',
'price_each','order_date','purchase_address',
'customer_id']
sales_2019.columns = new_column_list
# OBSERVING THE COLUMN NAME CHANGE
sales_2019
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | |
|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | Lightning Charging Cable | 1 | 14.95 | 12/11/2019 20:58 | 14 Madison St, San Francisco, CA 94016 | 106487 |
| 24985 | 319667 | AA Batteries (4-pack) | 2 | 3.84 | 12/1/2019 12:01 | 549 Willow St, Los Angeles, CA 90001 | 170444 |
| 24986 | 319668 | Vareebadd Phone | 1 | 400.00 | 12/9/2019 6:43 | 273 Wilson St, Seattle, WA 98101 | 127410 |
| 24987 | 319669 | Wired Headphones | 1 | 11.99 | 12/3/2019 10:39 | 778 River St, Dallas, TX 75001 | 206162 |
| 24988 | 319670 | Bose SoundSport Headphones | 1 | 99.99 | 12/21/2019 21:45 | 747 Chestnut St, Los Angeles, CA 90001 | 201282 |
185686 rows × 7 columns
# CREATING A COLUMN THAT SHOWS THE TOTAL ORDER AMOUNT FOR EACH ORDER (NAMING THE column 'order_total')
sales_2019['order_total'] = sales_2019['qty_ordered'] * sales_2019['price_each']
# OBSERVING THE 'order_total' COLUMN IN THE 'sales_2019' DATAFRAME
sales_2019.head(10)
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | |
|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 |
| 5 | 141239 | AAA Batteries (4-pack) | 1 | 2.99 | 1/29/2019 20:22 | 775 Willow St, San Francisco, CA 94016 | 205768 | 2.99 |
| 6 | 141240 | 27in 4K Gaming Monitor | 1 | 389.99 | 1/26/2019 12:16 | 979 Park St, Los Angeles, CA 90001 | 237633 | 389.99 |
| 7 | 141241 | USB-C Charging Cable | 1 | 11.95 | 1/5/2019 12:04 | 181 6th St, San Francisco, CA 94016 | 112853 | 11.95 |
| 8 | 141242 | Bose SoundSport Headphones | 1 | 99.99 | 1/1/2019 10:30 | 867 Willow St, Los Angeles, CA 90001 | 220222 | 99.99 |
| 9 | 141243 | Apple Airpods Headphones | 1 | 150.00 | 1/22/2019 21:20 | 657 Johnson St, San Francisco, CA 94016 | 187333 | 150.00 |
# NOW TO SPLIT THE 'purchase_address' TO CREATE
# 'address', 'city', 'state', 'zip_code' COLUMNS INTO THE 'sales_2019' DATAFRAME
# NOW TO SPLIT THE ADDRESS
sales_2019['address'] = sales_2019['purchase_address'].apply(lambda x: x.split(',')[0])
sales_2019['city'] = sales_2019['purchase_address'].apply(lambda x: x.split(',')[1])
sales_2019['state'] = sales_2019['purchase_address'].apply(lambda x: x.split(',')[2].split(' ')[1])
sales_2019['zip_code'] = sales_2019['purchase_address'].apply(lambda x: x.split(',')[2].split(' ')[2])
sales_2019
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | Lightning Charging Cable | 1 | 14.95 | 12/11/2019 20:58 | 14 Madison St, San Francisco, CA 94016 | 106487 | 14.95 | 14 Madison St | San Francisco | CA | 94016 |
| 24985 | 319667 | AA Batteries (4-pack) | 2 | 3.84 | 12/1/2019 12:01 | 549 Willow St, Los Angeles, CA 90001 | 170444 | 7.68 | 549 Willow St | Los Angeles | CA | 90001 |
| 24986 | 319668 | Vareebadd Phone | 1 | 400.00 | 12/9/2019 6:43 | 273 Wilson St, Seattle, WA 98101 | 127410 | 400.00 | 273 Wilson St | Seattle | WA | 98101 |
| 24987 | 319669 | Wired Headphones | 1 | 11.99 | 12/3/2019 10:39 | 778 River St, Dallas, TX 75001 | 206162 | 11.99 | 778 River St | Dallas | TX | 75001 |
| 24988 | 319670 | Bose SoundSport Headphones | 1 | 99.99 | 12/21/2019 21:45 | 747 Chestnut St, Los Angeles, CA 90001 | 201282 | 99.99 | 747 Chestnut St | Los Angeles | CA | 90001 |
185686 rows × 12 columns
sales_2019.head()
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 1/22/2019 21:25 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 1/28/2019 14:15 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 1/17/2019 13:33 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 1/5/2019 20:33 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 1/25/2019 11:59 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 |
# CHANGING THE 'order_date' TO DATE TO REMOVE THE TIMESTAMP
sales_2019['order_date'] = pd.to_datetime(sales_2019['order_date']).dt.date
sales_2019.head()
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 2019-01-22 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 2019-01-28 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 2019-01-17 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 2019-01-05 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 2019-01-25 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 |
# CHECKING THE DATA TYPE OF THE 'order_date' COLUMN
sales_2019['order_date'].dtypes
dtype('O')
# CHANGING THE 'order_date' COLUMN TO THE 'datetime64[ns]' DATA TYPE
sales_2019['order_date'] = sales_2019['order_date'].astype('datetime64[ns]')
# CREATING A 'month' AND 'day' COLUMN IN THE 'sales_2019' DATAFRAME
sales_2019['month'] = sales_2019['order_date'].dt.strftime('%m')
sales_2019['day'] = sales_2019['order_date'].dt.strftime('%d')
# OBSERVING THE ADDITION OF THE 'month' AND 'day' COLUMNS TO THE 'sales_2019' DATAFRAME
sales_2019.head()
| order_id | product | qty_ordered | price_each | order_date | purchase_address | customer_id | order_total | address | city | state | zip_code | month | day | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | iPhone | 1 | 700.00 | 2019-01-22 | 944 Walnut St, Boston, MA 02215 | 232266 | 700.00 | 944 Walnut St | Boston | MA | 02215 | 01 | 22 |
| 1 | 141235 | Lightning Charging Cable | 1 | 14.95 | 2019-01-28 | 185 Maple St, Portland, OR 97035 | 113519 | 14.95 | 185 Maple St | Portland | OR | 97035 | 01 | 28 |
| 2 | 141236 | Wired Headphones | 2 | 11.99 | 2019-01-17 | 538 Adams St, San Francisco, CA 94016 | 168669 | 23.98 | 538 Adams St | San Francisco | CA | 94016 | 01 | 17 |
| 3 | 141237 | 27in FHD Monitor | 1 | 149.99 | 2019-01-05 | 738 10th St, Los Angeles, CA 90001 | 199824 | 149.99 | 738 10th St | Los Angeles | CA | 90001 | 01 | 05 |
| 4 | 141238 | Wired Headphones | 1 | 11.99 | 2019-01-25 | 387 10th St, Austin, TX 73301 | 144894 | 11.99 | 387 10th St | Austin | TX | 73301 | 01 | 25 |
# CHANGING THE ORDER OF THE COLUMNS IN THE DATAFRAME
sales_2019 = sales_2019[['order_id','customer_id','order_date',
'month','day','product','qty_ordered',
'price_each','order_total','purchase_address',
'address','city','state','zip_code']]
# OBSERVING THE NEW COLUMN ORDER IN THE 'sales_2019' DATAFRAME
sales_2019
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | 232266 | 2019-01-22 | 01 | 22 | iPhone | 1 | 700.00 | 700.00 | 944 Walnut St, Boston, MA 02215 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | 113519 | 2019-01-28 | 01 | 28 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 185 Maple St, Portland, OR 97035 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | 168669 | 2019-01-17 | 01 | 17 | Wired Headphones | 2 | 11.99 | 23.98 | 538 Adams St, San Francisco, CA 94016 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 199824 | 2019-01-05 | 01 | 05 | 27in FHD Monitor | 1 | 149.99 | 149.99 | 738 10th St, Los Angeles, CA 90001 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | 144894 | 2019-01-25 | 01 | 25 | Wired Headphones | 1 | 11.99 | 11.99 | 387 10th St, Austin, TX 73301 | 387 10th St | Austin | TX | 73301 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24984 | 319666 | 106487 | 2019-12-11 | 12 | 11 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 14 Madison St, San Francisco, CA 94016 | 14 Madison St | San Francisco | CA | 94016 |
| 24985 | 319667 | 170444 | 2019-12-01 | 12 | 01 | AA Batteries (4-pack) | 2 | 3.84 | 7.68 | 549 Willow St, Los Angeles, CA 90001 | 549 Willow St | Los Angeles | CA | 90001 |
| 24986 | 319668 | 127410 | 2019-12-09 | 12 | 09 | Vareebadd Phone | 1 | 400.00 | 400.00 | 273 Wilson St, Seattle, WA 98101 | 273 Wilson St | Seattle | WA | 98101 |
| 24987 | 319669 | 206162 | 2019-12-03 | 12 | 03 | Wired Headphones | 1 | 11.99 | 11.99 | 778 River St, Dallas, TX 75001 | 778 River St | Dallas | TX | 75001 |
| 24988 | 319670 | 201282 | 2019-12-21 | 12 | 21 | Bose SoundSport Headphones | 1 | 99.99 | 99.99 | 747 Chestnut St, Los Angeles, CA 90001 | 747 Chestnut St | Los Angeles | CA | 90001 |
185686 rows × 14 columns
# OBSERVING THE DIFFERENT CITY/STATE COMBINATIONS AND THE NUBMER OF TIMES THEY APPEAR IN THE DATAFRAME
sales_2019[['city','state']].value_counts()
city state San Francisco CA 44662 Los Angeles CA 29564 New York City NY 24847 Boston MA 19901 Atlanta GA 14863 Dallas TX 14797 Seattle WA 14713 Portland OR 9998 Austin TX 9890 Portland ME 2451 dtype: int64
# FINDING THE SALES TOTAL FOR EACH MONTH
monthly_sales = sales_2019.groupby('month')[['order_total']].agg('sum').reset_index()
monthly_sales
| month | order_total | |
|---|---|---|
| 0 | 01 | 1821413.16 |
| 1 | 02 | 2200078.08 |
| 2 | 03 | 2804973.35 |
| 3 | 04 | 3389217.98 |
| 4 | 05 | 3150616.23 |
| 5 | 06 | 2576280.15 |
| 6 | 07 | 2646461.32 |
| 7 | 08 | 2241083.37 |
| 8 | 09 | 2094465.69 |
| 9 | 10 | 3734777.86 |
| 10 | 11 | 3197875.05 |
| 11 | 12 | 4608295.70 |
# NOW TO VISUALIZE
monthly_sales_graph = px.line(monthly_sales,x=['Jan','Feb','Mar','Apr',
'May','Jun','Jul','Aug',
'Sept','Oct','Nov','Dec']
,y='order_total',
title='Sales by Month',
labels=dict(x='Month',order_total='Sales($)'))
monthly_sales_graph.show()
# FINDING THE SALES TOTAL OF EACH PRODUCT FOR EACH MONTH
monthly_product_sales = sales_2019.groupby(['product','month'])[['order_total']].agg('sum').reset_index()
monthly_product_sales
| product | month | order_total | |
|---|---|---|---|
| 0 | 20in Monitor | 01 | 23977.82 |
| 1 | 20in Monitor | 02 | 27057.54 |
| 2 | 20in Monitor | 03 | 35746.75 |
| 3 | 20in Monitor | 04 | 43116.08 |
| 4 | 20in Monitor | 05 | 37506.59 |
| ... | ... | ... | ... |
| 223 | iPhone | 08 | 307300.00 |
| 224 | iPhone | 09 | 277900.00 |
| 225 | iPhone | 10 | 501200.00 |
| 226 | iPhone | 11 | 465500.00 |
| 227 | iPhone | 12 | 634200.00 |
228 rows × 3 columns
# NOW TO VISUALIZE
monthly_product_sales_graph = px.line(monthly_product_sales,x='month',y='order_total',
color='product',title='Monthly Product Sales',
labels={'month':'Month','order_total':'Sales($)','product':'Product'},
height=800)
monthly_product_sales_graph.show()
# FINDING THE SALES TOTALS FOR EACH STATE AND VISUALIZE
annual_state_totals = sales_2019.groupby('state')[['order_total']].agg('sum').sort_values('order_total', ascending=False).reset_index()
annual_state_totals
| state | order_total | |
|---|---|---|
| 0 | CA | 13703047.83 |
| 1 | NY | 4661867.14 |
| 2 | TX | 4583418.29 |
| 3 | MA | 3658627.65 |
| 4 | GA | 2794199.07 |
| 5 | WA | 2745046.02 |
| 6 | OR | 1870010.56 |
| 7 | ME | 449321.38 |
# VISUALIZING STATE SALES
state_sales = px.bar(annual_state_totals,x='state',y='order_total',
title='2019 Sales by State',
labels={'state':'','order_total':'Sales($)'})
state_sales.show()
# FINDING THE SALES TOTALS FOR EACH CITY
city_sales = sales_2019.groupby(['city','state'])[['order_total']].agg('sum').sort_values('order_total',ascending=False).reset_index()
city_sales
| city | state | order_total | |
|---|---|---|---|
| 0 | San Francisco | CA | 8254743.55 |
| 1 | Los Angeles | CA | 5448304.28 |
| 2 | New York City | NY | 4661867.14 |
| 3 | Boston | MA | 3658627.65 |
| 4 | Atlanta | GA | 2794199.07 |
| 5 | Dallas | TX | 2765373.96 |
| 6 | Seattle | WA | 2745046.02 |
| 7 | Portland | OR | 1870010.56 |
| 8 | Austin | TX | 1818044.33 |
| 9 | Portland | ME | 449321.38 |
# MODIFYING THE 'city_sales' DATAFRAME TO INCLUDE THE STATE WITH THE CITY, SINCE THERE ARE TWO 'Portlands' IN THE DATASET
# 'Portland, OR' AND 'Portland, ME'
city_sales['city'] = city_sales['city'] + '(' + city_sales['state'] + ')'
city_sales
| city | state | order_total | |
|---|---|---|---|
| 0 | San Francisco(CA) | CA | 8254743.55 |
| 1 | Los Angeles(CA) | CA | 5448304.28 |
| 2 | New York City(NY) | NY | 4661867.14 |
| 3 | Boston(MA) | MA | 3658627.65 |
| 4 | Atlanta(GA) | GA | 2794199.07 |
| 5 | Dallas(TX) | TX | 2765373.96 |
| 6 | Seattle(WA) | WA | 2745046.02 |
| 7 | Portland(OR) | OR | 1870010.56 |
| 8 | Austin(TX) | TX | 1818044.33 |
| 9 | Portland(ME) | ME | 449321.38 |
# NOW TO VISUALIZE
city_sales_graph = px.bar(city_sales,x='state',y='order_total',
color='city',
title='State Sales with City Breakdown',
labels={'order_total':'Sales($)','state':'','city':'City'})
city_sales_graph.show()
city_sales_graph2 = px.sunburst(city_sales,path=['state','city'],values='order_total',width=800,height=800)
city_sales_graph2.show()
# FINDING THE BREAKDOWN OF SALES TOTALS FOR EACH STATE PER MONTH
monthly_state_totals = sales_2019.groupby(['state','month'])[['order_total']].agg('sum').reset_index()
monthly_state_totals
| state | month | order_total | |
|---|---|---|---|
| 0 | CA | 01 | 724151.34 |
| 1 | CA | 02 | 888850.28 |
| 2 | CA | 03 | 1122715.72 |
| 3 | CA | 04 | 1362467.59 |
| 4 | CA | 05 | 1274553.77 |
| ... | ... | ... | ... |
| 91 | WA | 08 | 189335.10 |
| 92 | WA | 09 | 156369.27 |
| 93 | WA | 10 | 316841.32 |
| 94 | WA | 11 | 252068.18 |
| 95 | WA | 12 | 386005.95 |
96 rows × 3 columns
# FINDING THE TOTAL NUMBER OF EACH PRODUCT ORDERED PER MONTH
monthly_products_ordered = sales_2019.groupby(['product','month'])[['qty_ordered']].agg('sum').reset_index()
monthly_products_ordered
| product | month | qty_ordered | |
|---|---|---|---|
| 0 | 20in Monitor | 01 | 218 |
| 1 | 20in Monitor | 02 | 246 |
| 2 | 20in Monitor | 03 | 325 |
| 3 | 20in Monitor | 04 | 392 |
| 4 | 20in Monitor | 05 | 341 |
| ... | ... | ... | ... |
| 223 | iPhone | 08 | 439 |
| 224 | iPhone | 09 | 397 |
| 225 | iPhone | 10 | 716 |
| 226 | iPhone | 11 | 665 |
| 227 | iPhone | 12 | 906 |
228 rows × 3 columns
# FINDING THE TOTAL NUMBER OF PRODUCTS ORDERED FOR 2019
products_ordered = sales_2019.groupby('product')[['qty_ordered']].agg('sum').sort_values(by='qty_ordered',ascending=False).reset_index()
products_ordered
| product | qty_ordered | |
|---|---|---|
| 0 | AAA Batteries (4-pack) | 30986 |
| 1 | AA Batteries (4-pack) | 27615 |
| 2 | USB-C Charging Cable | 23931 |
| 3 | Lightning Charging Cable | 23169 |
| 4 | Wired Headphones | 20524 |
| 5 | Apple Airpods Headphones | 15637 |
| 6 | Bose SoundSport Headphones | 13430 |
| 7 | 27in FHD Monitor | 7541 |
| 8 | iPhone | 6847 |
| 9 | 27in 4K Gaming Monitor | 6239 |
| 10 | 34in Ultrawide Monitor | 6192 |
| 11 | Google Phone | 5529 |
| 12 | Flatscreen TV | 4813 |
| 13 | Macbook Pro Laptop | 4725 |
| 14 | ThinkPad Laptop | 4128 |
| 15 | 20in Monitor | 4126 |
| 16 | Vareebadd Phone | 2068 |
| 17 | LG Washing Machine | 666 |
| 18 | LG Dryer | 646 |
# NOW TO VISUALIZE
total_products_ordered = px.bar(products_ordered, x='product', y='qty_ordered',
title="Products Ordered in 2019",
labels={'product':'','qty_ordered':'Number of Products Ordered'})
total_products_ordered.show()
# TOTAL NUMBER OF ORDERS PER MONTH
total_monthly_orders = sales_2019.groupby('month')[['order_id']].agg('count').reset_index()
total_monthly_orders
| month | order_id | |
|---|---|---|
| 0 | 01 | 9699 |
| 1 | 02 | 11957 |
| 2 | 03 | 15128 |
| 3 | 04 | 18257 |
| 4 | 05 | 16552 |
| 5 | 06 | 13535 |
| 6 | 07 | 14275 |
| 7 | 08 | 11943 |
| 8 | 09 | 11603 |
| 9 | 10 | 20249 |
| 10 | 11 | 17544 |
| 11 | 12 | 24944 |
monthly_orders = px.bar(total_monthly_orders,x=['Jan','Feb','Mar','Apr',
'May','Jun','Jul','Aug',
'Sept','Oct','Nov','Dec'],
y='order_id',
title="Total Monthly Orders",
labels=dict(x='Month', order_id='Orders'),
text='order_id')
monthly_orders.show()
# BREAKDOWN OF TOTAL MONTHLY ORDERS BY STATE
monthly_order_count = sales_2019.groupby(['month','state'])[['order_id']].agg('count').reset_index()
monthly_order_count
| month | state | order_id | |
|---|---|---|---|
| 0 | 01 | CA | 3892 |
| 1 | 01 | GA | 785 |
| 2 | 01 | MA | 1048 |
| 3 | 01 | ME | 123 |
| 4 | 01 | NY | 1310 |
| ... | ... | ... | ... |
| 91 | 12 | ME | 315 |
| 92 | 12 | NY | 3399 |
| 93 | 12 | OR | 1329 |
| 94 | 12 | TX | 3351 |
| 95 | 12 | WA | 1975 |
96 rows × 3 columns
monthly_state_orders = px.bar(monthly_order_count,x='month',y='order_id',
color='state',
title='Total Monthly Orders by State',
labels={'month':'Month','order_id':'Orders','state':'State'})
monthly_state_orders.show()
# LOCATING ORDERS THAT HAVE MULTIPLE ITEMS (PRODUCTS)
## CUSTOMERS ORDERING MORE THAN ONE ITEM
multiple_item_orders = sales_2019[sales_2019['order_id'].duplicated(keep=False)]
multiple_item_orders.head()
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 42 | 141275 | 180173 | 2019-01-07 | 01 | 07 | Wired Headphones | 1 | 11.99 | 11.99 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 57 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones | 1 | 150.00 | 150.00 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 58 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack) | 3 | 3.84 | 11.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 133 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone | 1 | 400.00 | 400.00 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
# SORTING THE PRODUCTS TO HAVE ONE CONSISTENT ORDER WHEN IT COMES TIME TO GROUP THEM TOGETHER
# BASED ON ORDER_ID
multiple_item_orders = multiple_item_orders.groupby(['order_id']).apply(lambda x: x.sort_values(['product']))
multiple_item_orders
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| order_id | |||||||||||||||
| 141275 | 41 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 42 | 141275 | 180173 | 2019-01-07 | 01 | 07 | Wired Headphones | 1 | 11.99 | 11.99 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 | |
| 141290 | 58 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack) | 3 | 3.84 | 11.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 57 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones | 1 | 150.00 | 150.00 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 | |
| 141365 | 133 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone | 1 | 400.00 | 400.00 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 319584 | 24899 | 319584 | 168610 | 2019-12-11 | 12 | 11 | iPhone | 1 | 700.00 | 700.00 | 537 Sunset St, Portland, OR 97035 | 537 Sunset St | Portland | OR | 97035 |
| 319596 | 24913 | 319596 | 152714 | 2019-12-01 | 12 | 01 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 24912 | 319596 | 152714 | 2019-12-01 | 12 | 01 | iPhone | 1 | 700.00 | 700.00 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 | |
| 319631 | 24948 | 319631 | 141320 | 2019-12-17 | 12 | 17 | 34in Ultrawide Monitor | 1 | 379.99 | 379.99 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
| 24949 | 319631 | 141320 | 2019-12-17 | 12 | 17 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
14128 rows × 14 columns
# RESETTING THE INDEX
multiple_item_orders.reset_index(drop=True, inplace=True)
multiple_item_orders.head(10)
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 1 | 141275 | 180173 | 2019-01-07 | 01 | 07 | Wired Headphones | 1 | 11.99 | 11.99 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 2 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack) | 3 | 3.84 | 11.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 3 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones | 1 | 150.00 | 150.00 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 4 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone | 1 | 400.00 | 400.00 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| 5 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Wired Headphones | 1 | 11.99 | 11.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| 6 | 141384 | 119608 | 2019-01-03 | 01 | 03 | Google Phone | 1 | 600.00 | 600.00 | 223 Jackson St, Boston, MA 02215 | 223 Jackson St | Boston | MA | 02215 |
| 7 | 141384 | 119608 | 2019-01-03 | 01 | 03 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 223 Jackson St, Boston, MA 02215 | 223 Jackson St | Boston | MA | 02215 |
| 8 | 141450 | 166158 | 2019-01-12 | 01 | 12 | Bose SoundSport Headphones | 1 | 99.99 | 99.99 | 521 Park St, San Francisco, CA 94016 | 521 Park St | San Francisco | CA | 94016 |
| 9 | 141450 | 166158 | 2019-01-12 | 01 | 12 | Google Phone | 1 | 600.00 | 600.00 | 521 Park St, San Francisco, CA 94016 | 521 Park St | San Francisco | CA | 94016 |
# ADDING NEW COLUMNS TO THE 'multiple_item_orders' DATAFRAME TO GROUP ALL PRODUCTS FOR A PARTICULAR ORDER ID TOGETHER,
# NAMING IT ('all_products'), TOTALING THE QUANTITY OF PRODUCTS ('total_qty_ordered')
# AND TO SUM ORDER TOTALS ('complete_order_total')
multiple_item_orders['all_products'] = multiple_item_orders.sort_values(['product']).groupby('order_id')['product'].transform(lambda x: '/'.join(x))
multiple_item_orders['total_qty_ordered'] = multiple_item_orders.groupby('order_id')['qty_ordered'].transform(sum)
multiple_item_orders['complete_order_total'] = multiple_item_orders.groupby('order_id')['order_total'].transform(sum)
multiple_item_orders
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | all_products | total_qty_ordered | complete_order_total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 |
| 1 | 141275 | 180173 | 2019-01-07 | 01 | 07 | Wired Headphones | 1 | 11.99 | 11.99 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 |
| 2 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack) | 3 | 3.84 | 11.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 | AA Batteries (4-pack)/Apple Airpods Headphones | 4 | 161.52 |
| 3 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones | 1 | 150.00 | 150.00 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 | AA Batteries (4-pack)/Apple Airpods Headphones | 4 | 161.52 |
| 4 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone | 1 | 400.00 | 400.00 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 | Vareebadd Phone/Wired Headphones | 2 | 411.99 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14123 | 319584 | 168610 | 2019-12-11 | 12 | 11 | iPhone | 1 | 700.00 | 700.00 | 537 Sunset St, Portland, OR 97035 | 537 Sunset St | Portland | OR | 97035 | Wired Headphones/iPhone | 2 | 711.99 |
| 14124 | 319596 | 152714 | 2019-12-01 | 12 | 01 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 | Lightning Charging Cable/iPhone | 2 | 714.95 |
| 14125 | 319596 | 152714 | 2019-12-01 | 12 | 01 | iPhone | 1 | 700.00 | 700.00 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 | Lightning Charging Cable/iPhone | 2 | 714.95 |
| 14126 | 319631 | 141320 | 2019-12-17 | 12 | 17 | 34in Ultrawide Monitor | 1 | 379.99 | 379.99 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 | 34in Ultrawide Monitor/Lightning Charging Cable | 2 | 394.94 |
| 14127 | 319631 | 141320 | 2019-12-17 | 12 | 17 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 | 34in Ultrawide Monitor/Lightning Charging Cable | 2 | 394.94 |
14128 rows × 17 columns
# CHANGING THE ORDER OF THE COLUMNS IN THE 'multiple_item_orders' DATAFRAME
# MOVING THE NEWLY CREATED COLUMNS AHEAD OF THE ADDRESS COLUMNS
multiple_item_orders = multiple_item_orders[['order_id','customer_id',
'order_date','month','day',
'product','all_products',
'qty_ordered','total_qty_ordered',
'price_each','order_total','complete_order_total',
'purchase_address','address',
'city','state','zip_code']]
multiple_item_orders
| order_id | customer_id | order_date | month | day | product | all_products | qty_ordered | total_qty_ordered | price_each | order_total | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable | USB-C Charging Cable/Wired Headphones | 1 | 2 | 11.95 | 11.95 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 1 | 141275 | 180173 | 2019-01-07 | 01 | 07 | Wired Headphones | USB-C Charging Cable/Wired Headphones | 1 | 2 | 11.99 | 11.99 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 2 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack) | AA Batteries (4-pack)/Apple Airpods Headphones | 3 | 4 | 3.84 | 11.52 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 3 | 141290 | 146916 | 2019-01-02 | 01 | 02 | Apple Airpods Headphones | AA Batteries (4-pack)/Apple Airpods Headphones | 1 | 4 | 150.00 | 150.00 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 4 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone | Vareebadd Phone/Wired Headphones | 1 | 2 | 400.00 | 400.00 | 411.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14123 | 319584 | 168610 | 2019-12-11 | 12 | 11 | iPhone | Wired Headphones/iPhone | 1 | 2 | 700.00 | 700.00 | 711.99 | 537 Sunset St, Portland, OR 97035 | 537 Sunset St | Portland | OR | 97035 |
| 14124 | 319596 | 152714 | 2019-12-01 | 12 | 01 | Lightning Charging Cable | Lightning Charging Cable/iPhone | 1 | 2 | 14.95 | 14.95 | 714.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 14125 | 319596 | 152714 | 2019-12-01 | 12 | 01 | iPhone | Lightning Charging Cable/iPhone | 1 | 2 | 700.00 | 700.00 | 714.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 14126 | 319631 | 141320 | 2019-12-17 | 12 | 17 | 34in Ultrawide Monitor | 34in Ultrawide Monitor/Lightning Charging Cable | 1 | 2 | 379.99 | 379.99 | 394.94 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
| 14127 | 319631 | 141320 | 2019-12-17 | 12 | 17 | Lightning Charging Cable | 34in Ultrawide Monitor/Lightning Charging Cable | 1 | 2 | 14.95 | 14.95 | 394.94 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
14128 rows × 17 columns
# NOW THAT THE 'multiple_item_orders' DATAFRAME HAS COLUMNS THAT COMBINE ALL PRODUCTS ORDERED PER ORDER ID ALONG WITH
# QUANTITY ORDERED AND COMPLETE ORDER TOTALS, I WILL CLEAN UP THE 'multiple_item_orders' DATAFRAME BY REMOVING THE
# 'product','qty_ordered','price_each','order_total' COLUMNS (AS THEY ARE NO LONGER NECESSARY)
# AND DROPPING THE DUPLICATE ROWS BY ORDER ID
multiple_item_orders = multiple_item_orders.drop_duplicates(subset='order_id')
multiple_item_orders = multiple_item_orders.drop(columns=['product','qty_ordered','price_each','order_total'])
multiple_item_orders
| order_id | customer_id | order_date | month | day | all_products | total_qty_ordered | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 2 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack)/Apple Airpods Headphones | 4 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 4 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone/Wired Headphones | 2 | 411.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| 6 | 141384 | 119608 | 2019-01-03 | 01 | 03 | Google Phone/USB-C Charging Cable | 2 | 611.95 | 223 Jackson St, Boston, MA 02215 | 223 Jackson St | Boston | MA | 02215 |
| 8 | 141450 | 166158 | 2019-01-12 | 01 | 12 | Bose SoundSport Headphones/Google Phone | 2 | 699.99 | 521 Park St, San Francisco, CA 94016 | 521 Park St | San Francisco | CA | 94016 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14118 | 319536 | 138588 | 2019-12-16 | 12 | 16 | Macbook Pro Laptop/Wired Headphones | 2 | 1711.99 | 346 Elm St, Seattle, WA 98101 | 346 Elm St | Seattle | WA | 98101 |
| 14120 | 319556 | 217093 | 2019-12-22 | 12 | 22 | Google Phone/Wired Headphones | 2 | 611.99 | 848 7th St, Austin, TX 73301 | 848 7th St | Austin | TX | 73301 |
| 14122 | 319584 | 168610 | 2019-12-11 | 12 | 11 | Wired Headphones/iPhone | 2 | 711.99 | 537 Sunset St, Portland, OR 97035 | 537 Sunset St | Portland | OR | 97035 |
| 14124 | 319596 | 152714 | 2019-12-01 | 12 | 01 | Lightning Charging Cable/iPhone | 2 | 714.95 | 436 14th St, New York City, NY 10001 | 436 14th St | New York City | NY | 10001 |
| 14126 | 319631 | 141320 | 2019-12-17 | 12 | 17 | 34in Ultrawide Monitor/Lightning Charging Cable | 2 | 394.94 | 363 Highland St, Austin, TX 73301 | 363 Highland St | Austin | TX | 73301 |
6879 rows × 13 columns
# NOW TO CREATE A DATAFRAME SHOWING THE COUNTS OF EACH UNIQUE VALUE IN THE 'all_products' COLUMN IN THE 'multiple_item_orders'
# DATAFRAME ALONG WITH THE TOTALS OF EACH
paired_products = multiple_item_orders.groupby('all_products').agg(Count=('all_products','count'),
Sum=('complete_order_total','sum')).reset_index().sort_values('Count',ascending=False)
paired_products
| all_products | Count | Sum | |
|---|---|---|---|
| 191 | Lightning Charging Cable/iPhone | 895 | 640747.35 |
| 165 | Google Phone/USB-C Charging Cable | 869 | 532668.85 |
| 210 | Wired Headphones/iPhone | 374 | 266667.94 |
| 201 | USB-C Charging Cable/Vareebadd Phone | 318 | 131298.85 |
| 170 | Google Phone/Wired Headphones | 311 | 190700.58 |
| ... | ... | ... | ... |
| 113 | Apple Airpods Headphones/Google Phone/USB-C Ch... | 1 | 761.95 |
| 109 | Apple Airpods Headphones/Bose SoundSport Headp... | 1 | 949.99 |
| 108 | Apple Airpods Headphones/Bose SoundSport Headp... | 1 | 849.99 |
| 100 | AAA Batteries (4-pack)/Lightning Charging Cabl... | 1 | 732.89 |
| 112 | Apple Airpods Headphones/Google Phone/Lightnin... | 1 | 1476.94 |
211 rows × 3 columns
# DOING A QUICK CHECK TO MAKE SURE THE DATA IN THE 'paired_products', 'multiple_item_orders', AND 'sales2019' DATAFRAME
# ARE CORRECT ACCROSS THEM
# FIRST, CHECK THE LAST 10 ROWS OF THE 'paired_products' DATAFRAME
paired_products.head(10)
| all_products | Count | Sum | |
|---|---|---|---|
| 191 | Lightning Charging Cable/iPhone | 895 | 640747.35 |
| 165 | Google Phone/USB-C Charging Cable | 869 | 532668.85 |
| 210 | Wired Headphones/iPhone | 374 | 266667.94 |
| 201 | USB-C Charging Cable/Vareebadd Phone | 318 | 131298.85 |
| 170 | Google Phone/Wired Headphones | 311 | 190700.58 |
| 128 | Apple Airpods Headphones/iPhone | 299 | 254300.00 |
| 131 | Bose SoundSport Headphones/Google Phone | 169 | 118898.31 |
| 206 | Vareebadd Phone/Wired Headphones | 110 | 45426.81 |
| 81 | AA Batteries (4-pack)/Lightning Charging Cable | 103 | 2173.19 |
| 184 | Lightning Charging Cable/USB-C Charging Cable | 96 | 2797.60 |
# NOW TO QUERY THE ROW WHERE 'all_products' EQUALS 'Google Phone/Wired Headphones/Macbook Pro Laptop' TO MAKE SURE THE SUM
# IS THE SAME FROM THE 'multiple_item_orders' DATAFRAME
multiple_item_orders.query('(all_products=="Google Phone/Wired Headphones")').head()
| order_id | customer_id | order_date | month | day | all_products | total_qty_ordered | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 46 | 141965 | 131948 | 2019-01-09 | 01 | 09 | Google Phone/Wired Headphones | 2 | 611.99 | 303 10th St, Boston, MA 02215 | 303 10th St | Boston | MA | 02215 |
| 64 | 142048 | 182732 | 2019-01-04 | 01 | 04 | Google Phone/Wired Headphones | 2 | 611.99 | 628 11th St, Portland, OR 97035 | 628 11th St | Portland | OR | 97035 |
| 88 | 142308 | 204485 | 2019-01-13 | 01 | 13 | Google Phone/Wired Headphones | 2 | 611.99 | 767 Sunset St, San Francisco, CA 94016 | 767 Sunset St | San Francisco | CA | 94016 |
| 102 | 142419 | 232603 | 2019-01-17 | 01 | 17 | Google Phone/Wired Headphones | 3 | 623.98 | 947 2nd St, New York City, NY 10001 | 947 2nd St | New York City | NY | 10001 |
| 136 | 142824 | 217533 | 2019-01-06 | 01 | 06 | Google Phone/Wired Headphones | 2 | 611.99 | 850 13th St, Seattle, WA 98101 | 850 13th St | Seattle | WA | 98101 |
# FINALLY, TO QUERY THE 'order_id' FROM THE 'sales_2019' DATAFRAME TO OBSERVE THE BREAKDOWN OF THE 'total_qty_ordered' AND
# 'complete_order_total' COLUMNS
sales_2019.query('(order_id == 141965)')
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 755 | 141965 | 131948 | 2019-01-09 | 01 | 09 | Google Phone | 1 | 600.00 | 600.00 | 303 10th St, Boston, MA 02215 | 303 10th St | Boston | MA | 02215 |
| 756 | 141965 | 131948 | 2019-01-09 | 01 | 09 | Wired Headphones | 1 | 11.99 | 11.99 | 303 10th St, Boston, MA 02215 | 303 10th St | Boston | MA | 02215 |
# CREATING A # CREATING A DATAFRAME FOR THE TOP 10 PAIRED PRODUCTS TO VISUALIZE
top_10_paired_products = paired_products.head(10)
top_10_paired_products
| all_products | Count | Sum | |
|---|---|---|---|
| 191 | Lightning Charging Cable/iPhone | 895 | 640747.35 |
| 165 | Google Phone/USB-C Charging Cable | 869 | 532668.85 |
| 210 | Wired Headphones/iPhone | 374 | 266667.94 |
| 201 | USB-C Charging Cable/Vareebadd Phone | 318 | 131298.85 |
| 170 | Google Phone/Wired Headphones | 311 | 190700.58 |
| 128 | Apple Airpods Headphones/iPhone | 299 | 254300.00 |
| 131 | Bose SoundSport Headphones/Google Phone | 169 | 118898.31 |
| 206 | Vareebadd Phone/Wired Headphones | 110 | 45426.81 |
| 81 | AA Batteries (4-pack)/Lightning Charging Cable | 103 | 2173.19 |
| 184 | Lightning Charging Cable/USB-C Charging Cable | 96 | 2797.60 |
top_10_paired_products['Sum'] = top_10_paired_products['Sum'].round()
top_10_paired_products
| all_products | Count | Sum | |
|---|---|---|---|
| 191 | Lightning Charging Cable/iPhone | 895 | 640747.0 |
| 165 | Google Phone/USB-C Charging Cable | 869 | 532669.0 |
| 210 | Wired Headphones/iPhone | 374 | 266668.0 |
| 201 | USB-C Charging Cable/Vareebadd Phone | 318 | 131299.0 |
| 170 | Google Phone/Wired Headphones | 311 | 190701.0 |
| 128 | Apple Airpods Headphones/iPhone | 299 | 254300.0 |
| 131 | Bose SoundSport Headphones/Google Phone | 169 | 118898.0 |
| 206 | Vareebadd Phone/Wired Headphones | 110 | 45427.0 |
| 81 | AA Batteries (4-pack)/Lightning Charging Cable | 103 | 2173.0 |
| 184 | Lightning Charging Cable/USB-C Charging Cable | 96 | 2798.0 |
# NOW TO VISUALIZE
top10 = px.bar(top_10_paired_products,x='all_products',y='Count',
color='all_products',
title='Top 10 Paired Products',
labels={'count':'Count','all_products':'Product Pairs'},
text='Sum',height=700)
top10.update_traces(textangle=0, textposition='outside')
top10.update_xaxes(showticklabels=False)
top10.show()
# CHECKING THE 'sales_2019' DATAFRAME
sales_2019.head()
| order_id | customer_id | order_date | month | day | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | 232266 | 2019-01-22 | 01 | 22 | iPhone | 1 | 700.00 | 700.00 | 944 Walnut St, Boston, MA 02215 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | 113519 | 2019-01-28 | 01 | 28 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 185 Maple St, Portland, OR 97035 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | 168669 | 2019-01-17 | 01 | 17 | Wired Headphones | 2 | 11.99 | 23.98 | 538 Adams St, San Francisco, CA 94016 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 199824 | 2019-01-05 | 01 | 05 | 27in FHD Monitor | 1 | 149.99 | 149.99 | 738 10th St, Los Angeles, CA 90001 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | 144894 | 2019-01-25 | 01 | 25 | Wired Headphones | 1 | 11.99 | 11.99 | 387 10th St, Austin, TX 73301 | 387 10th St | Austin | TX | 73301 |
# CHECKING THE 'multiple_item_orders' DATAFRAME
multiple_item_orders.head()
| order_id | customer_id | order_date | month | day | all_products | total_qty_ordered | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141275 | 180173 | 2019-01-07 | 01 | 07 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 2 | 141290 | 146916 | 2019-01-02 | 01 | 02 | AA Batteries (4-pack)/Apple Airpods Headphones | 4 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 4 | 141365 | 115880 | 2019-01-10 | 01 | 10 | Vareebadd Phone/Wired Headphones | 2 | 411.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| 6 | 141384 | 119608 | 2019-01-03 | 01 | 03 | Google Phone/USB-C Charging Cable | 2 | 611.95 | 223 Jackson St, Boston, MA 02215 | 223 Jackson St | Boston | MA | 02215 |
| 8 | 141450 | 166158 | 2019-01-12 | 01 | 12 | Bose SoundSport Headphones/Google Phone | 2 | 699.99 | 521 Park St, San Francisco, CA 94016 | 521 Park St | San Francisco | CA | 94016 |
# NOW TO GIVE EACH UNIQUE PRODUCT IN THE 'sales_2019' DATAFRAME AND EACH UNIQUE PRODUCT PAIR IN THE
# 'multiple_item_orders' DATAFRAME ITS OWN UNIQUE 'product_id','product_pair_id', RESPECTIVELY
sales_2019['product_id'] = sales_2019.groupby('product').ngroup()+1
multiple_item_orders['product_pair_id'] = multiple_item_orders.groupby('all_products').ngroup()+1
# CHANGING THE ORDER OF THE COLUMNS IN THE 'sales_2019' AND 'multiple_item_orders' DATAFRAME
# MOVING THE NEWLY CREATED 'product_id' AND 'product_pair_id' COLUMNS AHEAD OF THE PRODUCT AND ALL PRODUCT COLUMNS
sales_2019 = sales_2019[['order_id','customer_id','order_date',
'month','day','product_id','product',
'qty_ordered','price_each','order_total',
'purchase_address','address',
'city','state','zip_code']]
multiple_item_orders = multiple_item_orders[['order_id','customer_id','order_date',
'month','day','product_pair_id',
'all_products','total_qty_ordered',
'complete_order_total','purchase_address',
'address','city','state','zip_code']]
# CHECKING THE 'sales_2019' DATAFRAME
sales_2019.head(10)
| order_id | customer_id | order_date | month | day | product_id | product | qty_ordered | price_each | order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141234 | 232266 | 2019-01-22 | 01 | 22 | 19 | iPhone | 1 | 700.00 | 700.00 | 944 Walnut St, Boston, MA 02215 | 944 Walnut St | Boston | MA | 02215 |
| 1 | 141235 | 113519 | 2019-01-28 | 01 | 28 | 13 | Lightning Charging Cable | 1 | 14.95 | 14.95 | 185 Maple St, Portland, OR 97035 | 185 Maple St | Portland | OR | 97035 |
| 2 | 141236 | 168669 | 2019-01-17 | 01 | 17 | 18 | Wired Headphones | 2 | 11.99 | 23.98 | 538 Adams St, San Francisco, CA 94016 | 538 Adams St | San Francisco | CA | 94016 |
| 3 | 141237 | 199824 | 2019-01-05 | 01 | 05 | 3 | 27in FHD Monitor | 1 | 149.99 | 149.99 | 738 10th St, Los Angeles, CA 90001 | 738 10th St | Los Angeles | CA | 90001 |
| 4 | 141238 | 144894 | 2019-01-25 | 01 | 25 | 18 | Wired Headphones | 1 | 11.99 | 11.99 | 387 10th St, Austin, TX 73301 | 387 10th St | Austin | TX | 73301 |
| 5 | 141239 | 205768 | 2019-01-29 | 01 | 29 | 6 | AAA Batteries (4-pack) | 1 | 2.99 | 2.99 | 775 Willow St, San Francisco, CA 94016 | 775 Willow St | San Francisco | CA | 94016 |
| 6 | 141240 | 237633 | 2019-01-26 | 01 | 26 | 2 | 27in 4K Gaming Monitor | 1 | 389.99 | 389.99 | 979 Park St, Los Angeles, CA 90001 | 979 Park St | Los Angeles | CA | 90001 |
| 7 | 141241 | 112853 | 2019-01-05 | 01 | 05 | 16 | USB-C Charging Cable | 1 | 11.95 | 11.95 | 181 6th St, San Francisco, CA 94016 | 181 6th St | San Francisco | CA | 94016 |
| 8 | 141242 | 220222 | 2019-01-01 | 01 | 01 | 8 | Bose SoundSport Headphones | 1 | 99.99 | 99.99 | 867 Willow St, Los Angeles, CA 90001 | 867 Willow St | Los Angeles | CA | 90001 |
| 9 | 141243 | 187333 | 2019-01-22 | 01 | 22 | 7 | Apple Airpods Headphones | 1 | 150.00 | 150.00 | 657 Johnson St, San Francisco, CA 94016 | 657 Johnson St | San Francisco | CA | 94016 |
# CHECKING THE 'multiple_item_orders' DATAFRAME
multiple_item_orders.head(10)
| order_id | customer_id | order_date | month | day | product_pair_id | all_products | total_qty_ordered | complete_order_total | purchase_address | address | city | state | zip_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 141275 | 180173 | 2019-01-07 | 01 | 07 | 205 | USB-C Charging Cable/Wired Headphones | 2 | 23.94 | 610 Walnut St, Austin, TX 73301 | 610 Walnut St | Austin | TX | 73301 |
| 2 | 141290 | 146916 | 2019-01-02 | 01 | 02 | 73 | AA Batteries (4-pack)/Apple Airpods Headphones | 4 | 161.52 | 4 1st St, Los Angeles, CA 90001 | 4 1st St | Los Angeles | CA | 90001 |
| 4 | 141365 | 115880 | 2019-01-10 | 01 | 10 | 207 | Vareebadd Phone/Wired Headphones | 2 | 411.99 | 20 Dogwood St, New York City, NY 10001 | 20 Dogwood St | New York City | NY | 10001 |
| 6 | 141384 | 119608 | 2019-01-03 | 01 | 03 | 166 | Google Phone/USB-C Charging Cable | 2 | 611.95 | 223 Jackson St, Boston, MA 02215 | 223 Jackson St | Boston | MA | 02215 |
| 8 | 141450 | 166158 | 2019-01-12 | 01 | 12 | 132 | Bose SoundSport Headphones/Google Phone | 2 | 699.99 | 521 Park St, San Francisco, CA 94016 | 521 Park St | San Francisco | CA | 94016 |
| 10 | 141457 | 212963 | 2019-01-09 | 01 | 09 | 129 | Apple Airpods Headphones/iPhone | 2 | 850.00 | 820 Jackson St, Seattle, WA 98101 | 820 Jackson St | Seattle | WA | 98101 |
| 12 | 141478 | 132046 | 2019-01-26 | 01 | 26 | 112 | Apple Airpods Headphones/Google Phone | 2 | 750.00 | 303 North St, Atlanta, GA 30301 | 303 North St | Atlanta | GA | 30301 |
| 14 | 141550 | 146803 | 2019-01-31 | 01 | 31 | 128 | Apple Airpods Headphones/Wired Headphones/iPhone | 4 | 1011.99 | 399 Church St, Boston, MA 02215 | 399 Church St | Boston | MA | 02215 |
| 17 | 141587 | 163327 | 2019-01-29 | 01 | 29 | 123 | Apple Airpods Headphones/USB-C Charging Cable | 2 | 161.95 | 504 14th St, Dallas, TX 75001 | 504 14th St | Dallas | TX | 75001 |
| 19 | 141645 | 237729 | 2019-01-30 | 01 | 30 | 190 | Lightning Charging Cable/Wired Headphones | 2 | 26.94 | 98 Forest St, New York City, NY 10001 | 98 Forest St | New York City | NY | 10001 |